# Loading the required libraries
import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F
# Define directories
input_images_dir = 'data/input_images'
output_images_dir = 'data/segmented_objects'
# Set device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# Load pre-trained Mask R-CNN model
model = maskrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
model.eval()
# Preprocess image
def preprocess_image(image_path, device, target_size=(800, 800)):
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Failed to load image: {image_path}")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, target_size)
image = F.to_tensor(image).unsqueeze(0).to(device)
return image
# Get list of image files
image_files = [f for f in os.listdir(input_images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
# Display preprocessed images
num_images = len(image_files)
num_columns = 3
num_rows = (num_images + num_columns - 1) // num_columns
fig, axes = plt.subplots(num_rows, num_columns, figsize=(15, 5 * num_rows))
axes = axes.flatten()
for i, image_file in enumerate(image_files):
image_path = os.path.join(input_images_dir, image_file)
try:
preprocessed_image = preprocess_image(image_path, device)
# Convert tensor back to numpy array
image_np = preprocessed_image.squeeze(0).permute(1, 2, 0).cpu().numpy()
image_np = (image_np * 255).astype(np.uint8) # Scale to [0, 255]
axes[i].imshow(image_np)
axes[i].set_title(image_file)
axes[i].axis('off')
except Exception as e:
print(e)
axes[i].text(0.5, 0.5, str(e), ha='center', va='center', fontsize=12, color='red')
axes[i].axis('off')
# Turn off any unused axes
for j in range(len(image_files), len(axes)):
axes[j].axis('off')
plt.tight_layout()
plt.show()
# Ensure output directory exists
os.makedirs(output_images_dir, exist_ok=True)
C:\Users\Rahul\anaconda3\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( C:\Users\Rahul\anaconda3\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=MaskRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
# Postprocess and visualize segmentation
def postprocess_image(image_tensor):
image_np = image_tensor.squeeze().cpu().numpy().transpose(1, 2, 0)
image_np = (image_np * 255).astype(np.uint8)
return image_np
def segment_image(image_path):
image_tensor = preprocess_image(image_path, device)
with torch.no_grad():
prediction = model(image_tensor)
masks = prediction[0]['masks'].cpu().numpy()
labels = prediction[0]['labels'].cpu().numpy()
scores = prediction[0]['scores'].cpu().numpy()
original_image = cv2.imread(image_path)
original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(10, 10))
plt.imshow(original_image_rgb)
for i in range(len(masks)):
if scores[i] > 0.5:
mask = masks[i, 0]
mask = (mask > 0.5).astype(np.uint8)
plt.contour(mask, colors=[np.random.rand(3,)])
plt.title('Segmented Image')
plt.axis('off')
plt.show()
# Process each image
for image_file in image_files:
image_path = os.path.join(input_images_dir, image_file)
segment_image(image_path)
Designd incorporate a model to segment objects inside an input image. This involves employing the Mask R-CNN model to segment or draw a bounding box around various objects in the image.
Model Implementation: Used a model Mask R-CNN to process the image and identify segmented regions for each object.
Code Output: Developed code to read an image, apply the segmentation model, and generate segmented regions.
Visual Output: Display the original images with overlaid segmented objects to visually verify the segmentation results.
Each extracted object should be assigned a unique ID for tracking
import pandas as pd
import cv2
import os
import numpy as np
import torch
from torchvision.transforms import functional as F
# Define directories
segmented_objects_dir = 'data/segmented_objects'
metadata_file = 'data/metadata.csv'
def extract_and_save_objects(image_path, master_id):
# Load the original image
original_image = cv2.imread(image_path)
if original_image is None:
raise ValueError(f"Failed to load image: {image_path}")
original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_height, original_width, _ = original_image_rgb.shape
# Process All Images and Save Metadata
# Preprocess the image
image_tensor = preprocess_image(image_path, device)
with torch.no_grad():
prediction = model(image_tensor)
masks = prediction[0]['masks'].cpu().numpy()
labels = prediction[0]['labels'].cpu().numpy()
scores = prediction[0]['scores'].cpu().numpy()
# Create output directory if it doesn't exist
os.makedirs(segmented_objects_dir, exist_ok=True)
metadata = []
object_id = 1
for i in range(len(masks)):
if scores[i] > 0.5:
mask = masks[i, 0]
mask = (mask > 0.5).astype(np.uint8)
# Resize mask to match original image size
mask_resized = cv2.resize(mask, (original_width, original_height))
masked_image = np.zeros_like(original_image_rgb)
masked_image[mask_resized == 1] = original_image_rgb[mask_resized == 1]
# Save the object image
object_file_path = os.path.join(segmented_objects_dir, f'{master_id}_{object_id}.jpg')
cv2.imwrite(object_file_path, cv2.cvtColor(masked_image, cv2.COLOR_RGB2BGR))
# Append metadata
metadata.append({
'master_id': master_id,
'object_id': object_id,
'file_path': object_file_path
})
object_id += 1
return metadata
# Process all images
all_metadata = []
for image_file in os.listdir(input_images_dir):
if image_file.endswith(('.jpg', '.jpeg', '.png')):
image_path = os.path.join(input_images_dir, image_file)
master_id = os.path.splitext(image_file)[0]
metadata = extract_and_save_objects(image_path, master_id)
all_metadata.extend(metadata)
# Save metadata to CSV
metadata_df = pd.DataFrame(all_metadata)
metadata_df.to_csv(metadata_file, index=False)
print(f"Extraction and storage complete. Metadata saved to {metadata_file}.")
Extraction and storage complete. Metadata saved to data/metadata.csv.
identification results saved to both .json and .csv format
import os
import torch
import clip
from PIL import Image
import pandas as pd
import json
# Define directories
segmented_objects_dir = 'data/segmented_objects'
descriptions_file = 'data/descriptions.csv'
descriptions_json_file = 'data/descriptions.json'
# Define textual descriptions
descriptions = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic light', 'fire hydrant', 'flower', 'stop sign',
'mobile phone', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'lion', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
'knife', 'spoon', 'tiger', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'TV', 'laptop', 'mouse',
'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
# Load CLIP model and preprocess
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
def identify_and_describe_object(image_path):
# Load and preprocess the image
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
# Prepare the text inputs for CLIP
text_inputs = clip.tokenize(descriptions).to(device)
# Get the image and text features
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text_inputs)
# Calculate similarity between image and text features
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
# Get the top description
top_description = descriptions[similarity[0].argmax().item()]
return top_description
# Process all segmented objects
metadata_file = 'data/metadata.csv'
metadata_df = pd.read_csv(metadata_file)
all_descriptions = []
for _, row in metadata_df.iterrows():
object_image_path = row['file_path']
description = identify_and_describe_object(object_image_path)
# Append description to the metadata
all_descriptions.append({
'master_id': row['master_id'],
'object_id': row['object_id'],
'file_path': object_image_path,
'description': description
})
# Save descriptions to CSV
descriptions_df = pd.DataFrame(all_descriptions)
descriptions_df.to_csv(descriptions_file, index=False)
# Save descriptions to JSON
with open(descriptions_json_file, 'w') as json_file:
json.dump(all_descriptions, json_file, indent=4)
print(f"Object identification complete. Descriptions saved to {descriptions_file} and {descriptions_json_file}.")
Object identification complete. Descriptions saved to data/descriptions.csv and data/descriptions.json.
import os
import cv2
import easyocr
# Define directories
input_images_dir = 'data/input_images'
text_extraction_results_file_csv = 'data/text_extraction_results.csv'
text_extraction_results_file_json = 'data/text_extraction_results.json'
# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])
# Function to extract text from an image
def extract_text(image_path):
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Failed to load image: {image_path}")
results = reader.readtext(image)
return results
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
import pandas as pd
# Prepare list to store results
results_list = []
# Get list of object image files
image_files = [f for f in os.listdir(input_images_dir) if os.path.isfile(os.path.join(input_images_dir, f))]
# Extract text/data from each image
for image_file in image_files:
image_path = os.path.join(input_images_dir, image_file)
try:
results = extract_text(image_path)
# Process the results and store them in the list
for (bbox, text, prob) in results:
bbox = [[float(coord) for coord in point] for point in bbox] # Convert bbox coordinates to float
prob = float(prob) # Ensure confidence is a float
results_list.append({
'Image': image_file,
'BBox': bbox,
'Text': text,
'Confidence': prob
})
except Exception as e:
print(f"Error processing {image_file}: {e}")
import json
# Convert list to DataFrame
results_df = pd.DataFrame(results_list)
# Save results to CSV file
results_df.to_csv(text_extraction_results_file_csv, index=False)
print(f"Saved text extraction results to {text_extraction_results_file_csv}")
# Save results to JSON file
try:
with open(text_extraction_results_file_json, 'w') as json_file:
json.dump(results_list, json_file, indent=4)
print(f"Saved text extraction results to {text_extraction_results_file_json}")
except TypeError as e:
print(f"Failed to save JSON results: {e}")
Saved text extraction results to data/text_extraction_results.csv Saved text extraction results to data/text_extraction_results.json
import pandas as pd
import json
import os
import csv
# Define file paths
identification_results_file = 'data/descriptions.csv'
text_extraction_results_file = 'data/text_extraction_results.csv'
summary_results_file = 'data/summaries.csv'
summary_results_json_file = 'data/summaries.json'
# Load identification and text extraction results with error handling
try:
identification_df = pd.read_csv(identification_results_file, encoding='utf-8', error_bad_lines=False, quoting=csv.QUOTE_MINIMAL)
text_extraction_df = pd.read_csv(text_extraction_results_file, encoding='utf-8', error_bad_lines=False, quoting=csv.QUOTE_MINIMAL)
except Exception as e:
print(f"Error loading CSV files: {e}")
raise
# Print column names for debugging
print("Identification DataFrame columns:", identification_df.columns)
print("Text Extraction DataFrame columns:", text_extraction_df.columns)
# Print sample rows for debugging
print("Sample rows from identification DataFrame:")
print(identification_df.head())
print("Sample rows from text extraction DataFrame:")
print(text_extraction_df.head())
# Extract filenames from file paths in identification_df
identification_df['Image'] = identification_df['file_path'].apply(lambda x: os.path.basename(x))
Identification DataFrame columns: Index(['master_id', 'object_id', 'file_path', 'description'], dtype='object')
Text Extraction DataFrame columns: Index(['Image', 'BBox', 'Text', 'Confidence'], dtype='object')
Sample rows from identification DataFrame:
master_id object_id file_path description
0 image_1 1 data/segmented_objects\image_1_1.jpg dog
1 image_2 1 data/segmented_objects\image_2_1.jpg elephant
2 image_3 1 data/segmented_objects\image_3_1.jpg tiger
3 image_3 2 data/segmented_objects\image_3_2.jpg tiger
4 image_4 1 data/segmented_objects\image_4_1.jpg cat
Sample rows from text extraction DataFrame:
Image BBox Text \
0 image_5.jpg [[28.0, 26.0], [388.0, 26.0], [388.0, 200.0], ... High
1 image_5.jpg [[27.0, 162.0], [580.0, 162.0], [580.0, 351.0]... Quality
2 image_5.jpg [[35.0, 337.0], [661.0, 337.0], [661.0, 473.0]... Pictures
3 image_5.jpg [[78.0, 532.0], [198.0, 532.0], [198.0, 670.0]... f
4 image_7.jpg [[35.0, 121.0], [305.0, 121.0], [305.0, 211.0]... PFO+
Confidence
0 0.999770
1 0.991969
2 0.987244
3 0.985653
4 0.120233
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\394609724.py:14: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future. identification_df = pd.read_csv(identification_results_file, encoding='utf-8', error_bad_lines=False, quoting=csv.QUOTE_MINIMAL) C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\394609724.py:15: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future. text_extraction_df = pd.read_csv(text_extraction_results_file, encoding='utf-8', error_bad_lines=False, quoting=csv.QUOTE_MINIMAL)
# Check if required columns are present
required_id_columns = ['master_id', 'object_id', 'Image', 'description']
required_text_columns = ['Image', 'BBox', 'Text', 'Confidence']
for col in required_id_columns:
if col not in identification_df.columns:
print(f"Warning: Missing column in identification results: {col}")
for col in required_text_columns:
if col not in text_extraction_df.columns:
print(f"Warning: Missing column in text extraction results: {col}")
# Merge results on 'Image' column
merged_df = pd.merge(identification_df, text_extraction_df, on='Image', how='left')
# Create summary DataFrame
summary_df = pd.DataFrame(columns=['master_id', 'object_id', 'file_path', 'description', 'BBox', 'Text', 'Confidence'])
# Generate summaries
for _, row in merged_df.iterrows():
summary_df = summary_df.append({
'master_id': row['master_id'],
'object_id': row['object_id'],
'file_path': row['file_path'],
'description': row['description'],
'BBox': row.get('BBox', 'N/A'), # Default to 'N/A' if NaN
'Text': row.get('Text', 'N/A'), # Default to 'N/A' if NaN
'Confidence': row.get('Confidence', 'N/A') # Default to 'N/A' if NaN
}, ignore_index=True)
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
C:\Users\Rahul\AppData\Local\Temp\ipykernel_25504\4219987423.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
summary_df = summary_df.append({
# Save summary results to CSV
summary_df.to_csv(summary_results_file, index=False)
print(f"Saved summary results to {summary_results_file}")
# Save summary results to JSON
summary_json = summary_df.to_dict(orient='records')
with open(summary_results_json_file, 'w') as json_file:
json.dump(summary_json, json_file, indent=4)
print(f"Saved summary results to {summary_results_json_file}")
Saved summary results to data/summaries.csv Saved summary results to data/summaries.json
import pandas as pd
import json
import os
# Define file paths
identification_results_file = 'data/descriptions.csv'
text_extraction_results_file = 'data/text_extraction_results.csv'
# Load identification and text extraction results
try:
identification_df = pd.read_csv(identification_results_file)
text_extraction_df = pd.read_csv(text_extraction_results_file)
except Exception as e:
print(f"Error loading CSV files: {e}")
raise
# Extract base filenames from the file paths for matching
identification_df['Base_Image'] = identification_df['file_path'].apply(lambda x: os.path.basename(x).split('_')[0])
text_extraction_df['Base_Image'] = text_extraction_df['Image'].apply(lambda x: x.split('_')[0])
print("Data loaded and base filenames extracted.")
Data loaded and base filenames extracted.
# Merge on base image names
merged_df = pd.merge(identification_df, text_extraction_df, on='Base_Image', how='left')
# Check merged DataFrame
print("Merged DataFrame with base image names:")
print(merged_df.head(10))
# Create data mapping structure
data_mapping = []
for master_id, group in merged_df.groupby('master_id'):
object_details = []
for _, row in group.iterrows():
item = {
'object_id': row['object_id'],
'file_path': row['file_path'],
'description': row['description'],
'text_data': {
'BBox': row.get('BBox', 'N/A'), # Use 'N/A' for missing values
'Text': row.get('Text', 'N/A'),
'Confidence': row.get('Confidence', 'N/A')
}
}
object_details.append(item)
data_mapping.append({
'master_id': master_id,
'object_details': object_details
})
print("Data mapping structure prepared.")
Merged DataFrame with base image names:
master_id object_id file_path description \
0 image_1 1 data/segmented_objects\image_1_1.jpg dog
1 image_1 1 data/segmented_objects\image_1_1.jpg dog
2 image_1 1 data/segmented_objects\image_1_1.jpg dog
3 image_1 1 data/segmented_objects\image_1_1.jpg dog
4 image_1 1 data/segmented_objects\image_1_1.jpg dog
5 image_1 1 data/segmented_objects\image_1_1.jpg dog
6 image_2 1 data/segmented_objects\image_2_1.jpg elephant
7 image_2 1 data/segmented_objects\image_2_1.jpg elephant
8 image_2 1 data/segmented_objects\image_2_1.jpg elephant
9 image_2 1 data/segmented_objects\image_2_1.jpg elephant
Base_Image Image BBox \
0 image image_5.jpg [[28.0, 26.0], [388.0, 26.0], [388.0, 200.0], ...
1 image image_5.jpg [[27.0, 162.0], [580.0, 162.0], [580.0, 351.0]...
2 image image_5.jpg [[35.0, 337.0], [661.0, 337.0], [661.0, 473.0]...
3 image image_5.jpg [[78.0, 532.0], [198.0, 532.0], [198.0, 670.0]...
4 image image_7.jpg [[35.0, 121.0], [305.0, 121.0], [305.0, 211.0]...
5 image image_7.jpg [[381.0, 121.0], [665.0, 121.0], [665.0, 211.0...
6 image image_5.jpg [[28.0, 26.0], [388.0, 26.0], [388.0, 200.0], ...
7 image image_5.jpg [[27.0, 162.0], [580.0, 162.0], [580.0, 351.0]...
8 image image_5.jpg [[35.0, 337.0], [661.0, 337.0], [661.0, 473.0]...
9 image image_5.jpg [[78.0, 532.0], [198.0, 532.0], [198.0, 670.0]...
Text Confidence
0 High 0.999770
1 Quality 0.991969
2 Pictures 0.987244
3 f 0.985653
4 PFO+ 0.120233
5 SHOP 0.999455
6 High 0.999770
7 Quality 0.991969
8 Pictures 0.987244
9 f 0.985653
Data mapping structure prepared.
# Define output file path
data_mapping_file = 'data/output/data_mapping.json'
# Ensure the output directory exists
output_dir = os.path.dirname(data_mapping_file)
os.makedirs(output_dir, exist_ok=True)
# Save data mapping to JSON
with open(data_mapping_file, 'w') as json_file:
json.dump({"images": data_mapping}, json_file, indent=4)
print(f"Saved data mapping to {data_mapping_file}")
Saved data mapping to data/output/data_mapping.json
import pandas as pd
import json
import os
# Define file paths
data_mapping_file = 'data/output/data_mapping.json'
summary_results_file = 'data/summaries.csv'
original_images_folder = 'data/input_images/'
output_dir = 'data/output/table_and_annotated/'
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Load data
with open(data_mapping_file, 'r') as json_file:
data_mapping = json.load(json_file)['images']
summary_df = pd.read_csv(summary_results_file)
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
def plot_image_with_annotations(image_path, objects, output_path):
try:
img = Image.open(image_path)
except FileNotFoundError:
print(f"Image not found: {image_path}")
return
fig, ax = plt.subplots(1, figsize=(12, 8))
ax.imshow(img)
for obj in objects:
bbox_data = obj['text_data']['BBox']
# Plot bounding boxes if they exist and are valid
if pd.notna(bbox_data) and bbox_data not in ['N/A', 'NaN']:
try:
bbox = json.loads(bbox_data) # Convert from JSON string
if isinstance(bbox, list) and len(bbox) == 4:
x_min, y_min, x_max, y_max = bbox[0][0], bbox[0][1], bbox[2][0], bbox[2][1]
rect = Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
# Annotate with text and confidence
annotation_text = f"{obj['description']} ({obj['text_data']['Text']})"
plt.text(x_min, y_min - 10, annotation_text,
bbox=dict(facecolor='yellow', alpha=0.5), fontsize=8, color='black')
else:
print(f"Invalid BBox format for object {obj['object_id']} in {obj['file_path']}: {bbox}")
except (json.JSONDecodeError, KeyError, IndexError) as e:
print(f"Error parsing BBox for object {obj['object_id']} in {obj['file_path']}: {e}")
plt.axis('off')
plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
plt.close()
def generate_summary_table(objects, csv_output_path, json_output_path):
summary_data = []
for obj in objects:
summary_data.append({
'Object ID': obj['object_id'],
'File Path': obj['file_path'],
'Description': obj['description'],
'BBox': obj['text_data']['BBox'],
'Text': obj['text_data']['Text'],
'Confidence': obj['text_data']['Confidence']
})
summary_df = pd.DataFrame(summary_data)
# Save as CSV
summary_df.to_csv(csv_output_path, index=False)
print(f"Saved summary table to CSV: {csv_output_path}")
# Save as JSON
with open(json_output_path, 'w') as json_file:
json.dump(summary_data, json_file, indent=4)
print(f"Saved summary table to JSON: {json_output_path}")
# Generate output for each master image
for entry in data_mapping:
master_id = entry['master_id']
objects = entry['object_details']
# Extract base image name
base_image_name = f"{master_id}.jpg"
original_image_path = os.path.join(original_images_folder, base_image_name)
# Output paths
annotated_image_path = os.path.join(output_dir, f"annotated_{base_image_name}")
summary_table_csv_path = os.path.join(output_dir, f"summary_{master_id}.csv")
summary_table_json_path = os.path.join(output_dir, f"summary_{master_id}.json")
# Check if the original image exists
if not os.path.exists(original_image_path):
print(f"Image not found: {original_image_path}")
continue
# Plot the image with annotations
plot_image_with_annotations(original_image_path, objects, annotated_image_path)
# Generate and save the summary table in both CSV and JSON formats
generate_summary_table(objects, summary_table_csv_path, summary_table_json_path)
print(f"Annotated images and summary tables saved in {output_dir}")
Saved summary table to CSV: data/output/table_and_annotated/summary_image_1.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_1.json Saved summary table to CSV: data/output/table_and_annotated/summary_image_2.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_2.json Saved summary table to CSV: data/output/table_and_annotated/summary_image_3.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_3.json Saved summary table to CSV: data/output/table_and_annotated/summary_image_4.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_4.json Saved summary table to CSV: data/output/table_and_annotated/summary_image_5.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_5.json Saved summary table to CSV: data/output/table_and_annotated/summary_image_6.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_6.json Saved summary table to CSV: data/output/table_and_annotated/summary_image_7.csv Saved summary table to JSON: data/output/table_and_annotated/summary_image_7.json Annotated images and summary tables saved in data/output/table_and_annotated/